Cyber operations dataset

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as mn
import zipfile
import plotly
import plotly.graph_objs as go
import plotly.express as px
from plotly.subplots import make_subplots
from bs4 import BeautifulSoup
import requests
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import warnings
warnings.filterwarnings('ignore')
!kaggle datasets download -d "fireballbyedimyrnmom/cyber-incidents-up-to-2020"
cyber-incidents-up-to-2020.zip: Skipping, found more recently modified local copy (use --force to force download)
# Unzipping the dataset downloaded
zipfile.ZipFile('cyber-incidents-up-to-2020.zip').extractall()
cyber=pd.read_csv('./cyber-operations-incidents.csv')
cyber.head()
| Title | Date | Affiliations | Description | Response | Victims | Sponsor | Type | Category | Sources_1 | Sources_2 | Sources_3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Attack on Austrian foreign ministry | 2/13/2020 | Turla | The suspected Russian hackers conducted a week... | Confirmation https://www.theregister.co.uk/2... | Austrian Foreign Ministry | Russian Federation | Espionage | Government | https://www.theregister.co.uk/2020/02/14/austr... | https://www.bmeia.gv.at/en/the-ministry/press/... | NaN |
| 1 | Spear-phishing campaign against unnamed U.S. g... | 1/23/2020 | Konni Group | The suspected North Korean threat actor Konni ... | NaN | Employees of the U.S. government | Korea (Democratic People's Republic of) | Espionage | Government | https://unit42.paloaltonetworks.com/the-fractu... | NaN | NaN |
| 2 | Australian Signals Directorate | 4/6/2020 | NaN | Responsible for attacking infrastructure that ... | NaN | NaN | Australia | Data destruction | Private sector | https://www.minister.defence.gov.au/minister/l... | https://www.zdnet.com/article/australia-on-the... | NaN |
| 3 | Catfishing of Israeli soldiers | 2/16/2020 | APT-C-23 | The Hamas-associated threat actor APT-C-23 tar... | Hack Back https://www.bleepingcomputer.com/n... | Israeli Defense Forces (IDF) soldiers | Palestine, State of | Espionage | Military | https://www.bleepingcomputer.com/news/security... | https://research.checkpoint.com/2020/hamas-and... | NaN |
| 4 | Targeting of U.S. companies and government age... | 8/10/2020 | Fox Kitten | Iranian hackers attacked high-end networking e... | NaN | U.S. government agencies, U.S. companies | Iran (Islamic Republic of) | Espionage | Government, Private sector | https://www.cybersafe.news/fbi-warns-about-ira... | https://www.zdnet.com/article/fbi-says-an-iran... | NaN |
cyber.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 481 entries, 0 to 480 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Title 481 non-null object 1 Date 474 non-null object 2 Affiliations 347 non-null object 3 Description 481 non-null object 4 Response 86 non-null object 5 Victims 453 non-null object 6 Sponsor 439 non-null object 7 Type 447 non-null object 8 Category 458 non-null object 9 Sources_1 475 non-null object 10 Sources_2 355 non-null object 11 Sources_3 168 non-null object dtypes: object(12) memory usage: 45.2+ KB
cyber.describe(include='all')
| Title | Date | Affiliations | Description | Response | Victims | Sponsor | Type | Category | Sources_1 | Sources_2 | Sources_3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 481 | 474 | 347 | 481 | 86 | 453 | 439 | 447 | 458 | 475 | 355 | 168 |
| unique | 477 | 403 | 262 | 479 | 80 | 387 | 39 | 7 | 24 | 450 | 343 | 164 |
| top | Targeting of Ukrainian government entities | 10/4/2018 | Believed to be the work of APT 28 | Gamaredon, a Russian-speaking APT, targeted Uk... | Criminal charges https://www.justice.gov/opa... | United States | China | Espionage | Private sector | https://blogs.microsoft.com/on-the-issues/2020... | https://blog.malwarebytes.com/threat-analysis/... | https://www.washingtonpost.com/world/national-... |
| freq | 4 | 6 | 15 | 3 | 2 | 21 | 167 | 374 | 128 | 4 | 2 | 2 |
cyber.columns
Index(['Title', 'Date', 'Affiliations', 'Description', 'Response', 'Victims',
'Sponsor', 'Type', 'Category', 'Sources_1', 'Sources_2', 'Sources_3'],
dtype='object')
print("Size =",cyber.size)
print("Shape =",cyber.shape)
Size = 5772 Shape = (481, 12)
# NULL values 'count' across columns
cyber.isnull().sum()
Title 0 Date 7 Affiliations 134 Description 0 Response 395 Victims 28 Sponsor 42 Type 34 Category 23 Sources_1 6 Sources_2 126 Sources_3 313 dtype: int64
# NULL values 'percentage' across columns
round(cyber.isnull().sum() / cyber.shape[0] * 100.00, 2)
Title 0.00 Date 1.46 Affiliations 27.86 Description 0.00 Response 82.12 Victims 5.82 Sponsor 8.73 Type 7.07 Category 4.78 Sources_1 1.25 Sources_2 26.20 Sources_3 65.07 dtype: float64
mn.matrix(cyber, figsize=(25,10))
<AxesSubplot:>
cyber_null = pd.DataFrame((cyber.isnull().sum())*100/cyber.shape[0]).reset_index()
cyber_null.columns = ['Column Name', 'Null Values Percentage']
fig = plt.figure(figsize=(10,5))
ax = sns.pointplot(x="Column Name",y="Null Values Percentage",data=cyber_null,color='blue')
plt.xticks(rotation =90,fontsize =7)
ax.axhline(50, ls='--',color='red')
plt.title("Percentage of NULL values in cyberattack data")
plt.ylabel("Null Values (%)")
plt.xlabel("Columns", )
plt.show()
fig,ax = plt.subplots(figsize=(20, 2))
cyber_null=cyber_null.set_index(cyber_null['Column Name'])
sns.heatmap(pd.DataFrame(cyber_null["Null Values Percentage"]).transpose(),cmap = 'coolwarm')
plt.show()
cyber_no_null_cat = cyber[cyber["Category"].notna()]
cyber_list = cyber_no_null_cat[(cyber_no_null_cat['Category'].str.contains(','))]
cyber_list = cyber_no_null_cat[(~cyber_no_null_cat['Category'].str.contains(','))]["Category"].unique().tolist()
cyber_category = []
cat_count = []
for i in cyber_list:
cyber_category .append(i)
cat_count.append(cyber_no_null_cat[cyber_no_null_cat.Category.str.contains(str(i))]["Category"].count())
cyber_new_cat = pd.DataFrame(cyber_category, columns =['Category'])
cyber_new_cat['Count'] = cat_count
cyber_new_cat = cyber_new_cat.sort_values('Count',ascending=False)
cyber_new_cat
| Category | Count | |
|---|---|---|
| 0 | Government | 240 |
| 1 | Private sector | 232 |
| 3 | Civil society | 103 |
| 2 | Military | 60 |
fig = px.bar(cyber_new_cat, x='Category', y='Count', title='Cyberattack Category', color='Category',)
fig.update_traces(marker_line_color='black',marker_line_width=1, opacity=0.75, width=0.5)
fig.show()
fig = px.pie(cyber_new_cat, values='Count', names='Category', title='Cyberattacks Category', width=450, height=450)
fig.show()
cyber['Date'] = pd.to_datetime(cyber['Date'])
fig = px.line(cyber, x='Date', y='Type', title='Cyberattack types over time', color="Type",hover_name="Title")
fig.show()
cyber['Sponsor'] = cyber['Sponsor'].str.replace('of America','')
cyber_sponsor = cyber['Sponsor'].value_counts().reset_index()
cyber_sponsor.columns = ['Sponsor','Count']
cyber_sponsor
sponsor_no_null_cat = cyber[cyber["Sponsor"].notna()]
sponsor_list = sponsor_no_null_cat[(sponsor_no_null_cat['Sponsor'].str.contains(','))]
sponsor_list = sponsor_no_null_cat[(~sponsor_no_null_cat['Sponsor'].str.contains(','))]["Sponsor"].unique().tolist()
sponsor_new_list = []
sponsor_new_count = []
for i in sponsor_list:
sponsor_new_list.append(i)
sponsor_new_count.append(sponsor_no_null_cat[sponsor_no_null_cat.Sponsor.str.contains(str(i))]["Sponsor"].count())
sponsor_new_cat = pd.DataFrame(sponsor_new_list, columns =['Sponsor'])
sponsor_new_cat['Count'] = sponsor_new_count
sponsor_new_cat = sponsor_new_cat.sort_values('Count',ascending=False)
fig = px.pie(sponsor_new_cat.head(10), values='Count', names='Sponsor', title='Sponsor of Cyberattack')
fig.show()
russ_att_ukr = cyber[(cyber['Victims'].str.contains('Ukraine')) & (cyber['Sponsor'].str.contains('Russia')) & (cyber['Category'].str.contains('Government'))]
russ_att_ukr = russ_att_ukr.drop(['Sources_1','Sources_2','Sources_3','Response','Affiliations'],axis=1)
russ_att_ukr
| Title | Date | Description | Victims | Sponsor | Type | Category | |
|---|---|---|---|---|---|---|---|
| 182 | Attempt to compromise Ukraine's judicial system | 2018-12-04 | Ukraine’s security agency thwarted an attack a... | Ukraine | Russian Federation | Espionage | Government |
| 231 | Compromise of Ukrainian government | 2018-11-20 | A Russian actor tied to Russia’s Federal Secur... | Ukraine | Russian Federation | Espionage | Government |
| 252 | Bad Rabbit | 2017-10-24 | Using a tool called Bad Rabbit, a threat actor... | Ukraine, Japan, Russia, Bulgaria, Turkey | Russian Federation | Sabotage | Government |
| 259 | NotPetya | 2017-07-01 | Threat actors deploy a tool, called NotPetya, ... | Rosneft, WPP Plc., Maersk, Cie de Saint-Gobain... | Russian Federation | Data destruction | Government, Private sector |
| 331 | Gamaredon | 2015-04-28 | A Russian-linked threat group known as Gamared... | Ukraine | Russian Federation | Espionage | Government |
| 349 | Targeting of Ukrainian law enforcement and gov... | 2015-03-13 | A threat actor attempted to compromise Ukraini... | Ukraine | Russian Federation | Espionage | Military, Government |
| 361 | Crouching Yeti | 2014-07-31 | This threat actor targets companies in the edu... | United States, Spain, Japan, Germany, France, ... | Russian Federation | Espionage | Private sector, Government |
| 368 | APT 28 | 2014-10-28 | This threat actor is linked to espionage campa... | Georgia, NATO, OSCE, France, Ukraine, United K... | Russian Federation | NaN | Government, Military, Private sector |
| 369 | Sandworm | 2014-11-03 | This threat actor targets industrial control s... | Russia, Ukraine, Poland, Lithuania, Belarus, A... | Russian Federation | Espionage | Private sector, Government |
| 374 | Attempted compromise of Ukrainian email accounts | 2014-12-09 | A threat actor attempted to compromise email a... | Ukraine | Russian Federation | Espionage | Government, Military |
| 399 | The Dukes | 2013-02-01 | This threat actor targets government ministrie... | United States, Georgia, Brazil, China, Japan, ... | Russian Federation | Espionage | Government, Private sector |
| 405 | Red October | 2013-01-14 | This threat actor targets governments, diploma... | Russia, Kazakhstan, Azerbaijan, Belgium, India... | Russian Federation | Espionage | Government, Private sector |
# Calculate the time range of cyberattacks by Russia on Ukraine
russ_att_ukr_days = russ_att_ukr['Date'].max() - russ_att_ukr['Date'].min()
# Convert the days to months and years
russ_att_ukr_days = russ_att_ukr_days / np.timedelta64(1, 'M')
print("Russia has been attacking Ukraine on the cyberfront for",round(russ_att_ukr_days) ,"months")
russ_att_ukr_days = russ_att_ukr['Date'].max() - russ_att_ukr['Date'].min()
russ_att_ukr_days = russ_att_ukr_days / np.timedelta64(1, 'Y')
print("That is close to",round(russ_att_ukr_days)
,"years")
Russia has been attacking Ukraine on the cyberfront for 71 months That is close to 6 years
url = 'https://www.csis.org/programs/strategic-technologies-program/significant-cyber-incidents'
# Web scraping using BeautifulSoup
page = requests.get(url).text
soup = BeautifulSoup(page, 'lxml')
results = soup.find('div', class_="layout-detail-page__main")
news_list=[]
for i in results.find_all('p'):
# Printing the text of the paragraph only if it contains the word 'Ukraine' and 'Russia'
if 'Ukraine' in i.text and 'Russia' in i.text:
new_list=i.text
news_list.append(new_list)
print(i.text,"\n")
# Converting into a dataframe
news_list_new = []
news_list_rest = []
for i in news_list:
news_list_new.append(i.split(' ', 2)[0:2])
news_list_rest.append(i.split(' ', 2)[2])
# Splitting date and description in different columns
news_list_new = pd.DataFrame(news_list_new, columns =['Date','Year'])
news_list_rest = pd.DataFrame(news_list_rest, columns =['Description'])
news_list_new = pd.concat([news_list_new, news_list_rest], axis=1)
news_list_new['Date'] = news_list_new['Date'] + ' ' + news_list_new['Year']
news_list_new = news_list_new.drop(['Year'],axis=1)
news_list_new['Date'] = news_list_new['Date'].str.replace('.','')
news_list_new
October 2022. Russian official, Vladimir Shin, accused the U.S. government and its
allies of a coordinated campaign of cyberattacks against Russia. Shin cited
comments from General Paul Nakasone confirming the U.S. "conducted a series
of operations" in response to Russia's invasion of Ukraine.
October 2022.
Hackers targeted Bulgarian websites belonging to the presidential
administration, the Defense Ministry, the Interior Ministry, the Justice
Ministry, and the Constitutional Court in a DDoS attack. A pro-Russian
hacking group claimed responsibility for the attack, stating it was
punishment “for betrayal to Russia and the supply of weapons to Ukraine.”
August 2022. Hackers targeted the website of Ukraine’s state energy agency responsible for the oversight of Ukraine’s nuclear power plants. The agency stated Russian hackers carried out the attack.
May 2022. Russian hackers hit Italian websites with a DDoS attack, including the Senate, the Ministry of Defence and the National Health Institute. The group states its goal was to target NATO countries and Ukraine.
April 2022. The Romanian National Directorate of Cyber Security said that multiple public and private sector websites were hit with DDoS attacks. The victims included the ministry of defense, border police, national railway company, and the OTP Bank. A group claiming credit for the attack said on Telegram that it hacked the websites because Romania supported Ukraine since the Russian invasion of the country.
April 2022 . Hackers targeted a Ukrainian energy facility, but CERT-UA and private sector assistance largely thwarted attempts to shutdown electrical substations in Ukraine. Researchers believe the attack came from the same group with ties to the Russian GRU that targeted Ukraine’s power grid in 2016, using an updated form of the same malware.
February 2022. The websites of the Ukrainian Cabinet of Ministers and Ministries of Foreign Affairs, Infrastructure, and Education were disrupted in the days before Russian troops invaded Ukraine. Wiper malware was also used to penetrate the networks of one Ukrainian financial institution and two government contractors.
| Date | Description | |
|---|---|---|
| 0 | \nOctober 2022 | Russian official, Vladimir Shin, accused the U... |
| 1 | \nOctober 2022\n | Hackers targeted Bulgarian websites belongi... |
| 2 | August 2022 | Hackers targeted the website of Ukraine’s stat... |
| 3 | May 2022 | Russian hackers hit Italian websites with a DD... |
| 4 | April 2022 | The Romanian National Directorate of Cyber Se... |
| 5 | April 2022 | . Hackers targeted a Ukrainian energy facility... |
| 6 | February 2022 | The websites of the Ukrainian Cabinet of Minis... |
# Getting key words from news_list_new Description column using nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
# Creating a list of words from the Description column
words = []
for i in news_list_new['Description']:
words.append(word_tokenize(i))
# Removing the stop words from the list of words
words_new = []
for i in words:
words_new.append([w for w in i if not w in stop_words])
# Removing the punctuations from the list of words
words_new_new = []
for i in words_new:
words_new_new.append([w for w in i if w.isalpha()])
# Adding the words as new column to the dataframe
news_list_new['Key Words'] = words_new_new
news_list_new
| Date | Description | Key Words | |
|---|---|---|---|
| 0 | \nOctober 2022 | Russian official, Vladimir Shin, accused the U... | [Russian, official, Vladimir, Shin, accused, g... |
| 1 | \nOctober 2022\n | Hackers targeted Bulgarian websites belongi... | [Hackers, targeted, Bulgarian, websites, belon... |
| 2 | August 2022 | Hackers targeted the website of Ukraine’s stat... | [Hackers, targeted, website, Ukraine, state, e... |
| 3 | May 2022 | Russian hackers hit Italian websites with a DD... | [Russian, hackers, hit, Italian, websites, DDo... |
| 4 | April 2022 | The Romanian National Directorate of Cyber Se... | [The, Romanian, National, Directorate, Cyber, ... |
| 5 | April 2022 | . Hackers targeted a Ukrainian energy facility... | [Hackers, targeted, Ukrainian, energy, facilit... |
| 6 | February 2022 | The websites of the Ukrainian Cabinet of Minis... | [The, websites, Ukrainian, Cabinet, Ministers,... |